
# function to load the RDara
loadRData <- function(fileName){
  #loads an RData file, and returns it
  load(fileName)
  get(ls()[ls() != "fileName"])
}

####################################################
# 1. Systematic bias:increasing number of cyber events
####################################################

#################################
# 1.1.start with adding COs (randomly)

# increases a number of cyber events
# simulate more cyber events:
simulate_extra_events_random = function(data_cyb, data_mil, 
                                        outcome = 'OUTCOME', 
                                        p0=.1){
  
  # to generate additional cyber events (10 percent + update cyber lags from cyber events
  Y = data_cyb[[outcome]]
  # which events are cyber are 0s
  idx_no_cyb_events = which(Y==0)
  # increase rate of the events; .1 = 10%
  n_events = floor(p0* sum(Y))
  idx_no_obs = sample(idx_no_cyb_events, n_events, replace = FALSE)
  Y[idx_no_obs] = 1
  data_cyb[[outcome]] = Y
  data_cyb = data_cyb %>%
    group_by(DYAD)%>%
    mutate(CYBER_LAG_NA = lag(OUTCOME))
  
  # update cyber lags in the military data:
  data_mil = data_mil %>%
    select(-CYBER_LAG_NA)%>%
    left_join(data_cyb %>%  select(DYAD, YEAR, CYBER_LAG_NA),
              by = c("DYAD", "YEAR"))
  
  # return the whole dataset:
  bind_rows(data_cyb, data_mil)%>%
    # remove year 2000:
    filter(YEAR!=2000)
}


############################################################
# 1.2. simulating COs from the model:

simulate_extra_events_from_model = function(data_cyb, data_mil, 
                                            outcome = 'OUTCOME', 
                                            model = mod, 
                                            p_base=.1,
                                            # if reverse equal true, we simulate against our model using p = 1 - p
                                            reverse = FALSE
                                            ){

  p = predict(model, type='response', newdata=data_cyb)
  data_cyb$PREDICTION = p
  if (reverse == TRUE) { 
    p = 1 - p 
  } 
  
  # to generate additional cyber events (10 percent + update cyber lags from cyber events
  Y = data_cyb[[outcome]]
  idx_no_cyb_events = which(Y==0)
  idx_2000 = which(data_cyb$YEAR == 2000)
  idx_2001 = which(data_cyb$YEAR == 2001)
  p[idx_2000] = p[idx_2001]
  p0 = p[idx_no_cyb_events]
  p0 = plogis( qlogis(p0) - mean(qlogis(p0), na.rm = T) ) * p_base / 0.5
  latent_obs = rbinom(length(idx_no_cyb_events), size = 1, prob = p0)
  Y[idx_no_cyb_events] = latent_obs
  data_cyb[[outcome]] = Y
  data_cyb$p0 = NA
  data_cyb$p0[idx_no_cyb_events] = p0
  data_cyb = data_cyb %>%
    group_by(DYAD)%>%
    mutate(CYBER_LAG_NA = lag(OUTCOME))
  table(is.na(data_cyb$CYBER_LAG_NA))
  

    data_mil = data_mil %>%
    dplyr::select(-CYBER_LAG_NA)%>%
    left_join(data_cyb %>%  
    dplyr::select(DYAD, YEAR, CYBER_LAG_NA), by = c("DYAD", "YEAR"))
  

  bind_rows(data_cyb, data_mil) %>%
    filter(YEAR != 2000)
}

###################################################
# 2. Conditional bias
###################################################

###################################################
# 2.1. Events that are generated by the model 
# predictions (military/cyber conflict; int_users)
###################################################

simulate_extra_events_conditional = function(data_cyb, data_mil, 
                                      outcome = 'OUTCOME', 
                                      variable = 'CONVENTIONAL',
                                      model = mod, 
                                      p00=.2, 
                                      p01 = .1,
                                      # if reverse equal true, we simulate against our model using p = 1 - p
                                      reverse = FALSE
                                      ){
  
  p = predict(model, type='response', newdata=data_cyb) 
  data_cyb$PREDICTION = p
  if (reverse == TRUE) { 
    p = 1 - p 
  } 
  
  Y = data_cyb[[outcome]]
  idx_no_cyb_events = which(Y==0)
  idx_2000 = which(data_cyb$YEAR == 2000)
  idx_2001 = which(data_cyb$YEAR == 2001)
  p[idx_2000] = p[idx_2001]
  p0 = p[idx_no_cyb_events]
  p0 = plogis( qlogis(p0) - mean(qlogis(p0), na.rm = T) ) * 0.5 # will have mean 1
  var_nocyb = data_cyb[[variable]][idx_no_cyb_events] == 1
  p0 = p0*(p01*var_nocyb+p00*(!var_nocyb)) 
  latent_obs = rbinom(length(idx_no_cyb_events), size = 1, prob = p0)
  Y[idx_no_cyb_events] = latent_obs
  data_cyb[[outcome]] = Y
  data_cyb = data_cyb %>%
    group_by(DYAD)%>%
    mutate(CYBER_LAG_NA = lag(OUTCOME))
  data_mil = data_mil %>%
    dplyr::select(-CYBER_LAG_NA)%>%
    left_join(data_cyb %>%  
                dplyr::select(DYAD, YEAR, CYBER_LAG_NA), by = c("DYAD", "YEAR"))
  bind_rows(data_cyb, data_mil) %>%
   filter(YEAR!=2000)
}

#####################################
# 2.2. Difference in reporting 
# between autocracies and democracies
# (events that not generated by the model 
# predictions: reporting bias by regime
# type; us-target, us-attacker; media
# reporting in the english speaking 
# countries )
#####################################

simulate_extra_events_aut = function(data_cyb_aut, data_mil_aut, 
                                     outcome = 'OUTCOME', 
                                     p0_aut = .2, p0_dem = .1){
  
  Y = data_cyb_aut[[outcome]]
  idx_no_cyb_events = Y==0
  autocracy = data_cyb_aut$DEMOCRACY==0
   n_events_aut = pmin(floor(p0_aut* sum(Y[autocracy==TRUE])), 
                      sum(1-Y[autocracy==TRUE]))
  idx_no_obs_aut = sample(which(idx_no_cyb_events==TRUE & 
                                  autocracy == TRUE), 
                          n_events_aut, replace = FALSE)
  Y[idx_no_obs_aut] = 1
  
  n_events_dem = pmin(floor(p0_dem* sum(Y[autocracy==FALSE])), 
                      sum(1-Y[autocracy==FALSE]))
  idx_no_obs_dem = sample(which(idx_no_cyb_events==FALSE & 
                                  autocracy == FALSE), 
                          n_events_dem, replace = FALSE)
  Y[idx_no_obs_dem] = 1
  data_cyb_aut[[outcome]] = Y
  data_cyb_aut = data_cyb_aut %>%
    group_by(DYAD)%>%
    mutate(CYBER_LAG_NA = lag(OUTCOME))
  
  data_mil_aut = data_mil_aut %>%
    select(-CYBER_LAG_NA)%>%
    left_join(data_cyb_aut %>%  select(DYAD, YEAR, CYBER_LAG_NA), 
              by = c("DYAD", "YEAR"))
  

  bind_rows(data_cyb_aut, data_mil_aut)%>%
    filter(YEAR!=2000)
}




